------------------------------------------------------------------------------------------------------------------------------------
      name:  plog_896
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/analysis/summarystats.log
  log type:  text
 opened on:  27 Nov 2024, 17:59:28

. *************************************************************************************************************** 
. * NG: 9/18/2019
. * summarystats.do
. * Creates Stats Des
. *
. * NG Edit: 01/20/2020, Update to match with the new name of cohort, entrycohort
. * JR, 4/24/2020: Rewrite to work with microdata
. 
. 
. clear

.   
. cap project, doinfo

. if _rc==0 {
.         local pdir "`r(pdir)'"                                                      // the project's main dir.
.         local dofile "`r(dofile)'"                                                  // do-file's stub name
.         local sig {bind:{hi:[`dofile'.dta. RP : `dofile'.do, `c(current_date)']}}       // a signature in notes
.         local doasproject=1
. }

. else {
.         local pdir "~/GRscarring"
.         local dofile "summarystats"
.         local doasproject=0
. }

. 
. set more off

. local rootdir "`pdir'"

. local thisdir "`pdir'"

. 
. local prepdata "`pdir'/scratch"

. local scratch "`pdir'/scratch"

. local rawdata "`pdir'/rawdata"

. local output "`pdir'/results"

. 
. // Prep the big CPS data
. 
. if `doasproject'==1 {
.         project, uses("`scratch'/statepop.dta")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/statepop.dta" filesig(3467904037:115254
> )
.         project, uses("`scratch'/unrate_national.dta")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/unrate_national.dta" filesig(1052845851
> :51188)
.         project, uses("`scratch'/unrate_state.dta")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/unrate_state.dta" filesig(3849316085:13
> 84205)
.   project, uses("`prepdata'/extractcps.dta.gz")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/extractcps.dta.gz" filesig(1117482818:1
> 425161591)
.   project, uses("`prepdata'/extractorg_morg.dta.gz")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/extractorg_morg.dta.gz" filesig(8195742
> 91:643171877)
.   project, uses("`prepdata'/combinecollapse_yca2s.dta")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/combinecollapse_yca2s.dta" filesig(9104
> 81610:162185009)
. }

. 
. 
. *Prepare population and unemployment rates to merge on, at year-cohort and year-state-cohort levels
.  // Make national version of population dataset
.   use `scratch'/statepop

.   collapse (sum) pop, by(year)

.   tempfile natlpop

.   save `natlpop'
file /tmp/St2868349.000004 saved as .dta format

.  // Make annual versions of unemployment rates
.   use `scratch'/unrate_national

.   isid yearmo

.   sort yearmo

.   gen year=yofd(dofm(yearmo))

.   bys year (yearmo): keep if _n==_N
(805 observations deleted)

.   keep year ur_nat_annual ur_nat_3yr_avg

.   label var ur_nat_annual "Unemployment rate (national)"

.   tempfile natlur

.   save `natlur'
file /tmp/St2868349.000005 saved as .dta format

.   merge 1:1 year using `natlpop', nogen assert(1 3)

    Result                      Number of obs
    -----------------------------------------
    Not matched                            25
        from master                        25  
        from using                          0  

    Matched                                49  
    -----------------------------------------

.   tempfile popur_n

.   save `popur_n'
file /tmp/St2868349.000006 saved as .dta format

.   // Have UR 1947-2017, pop only 1970-2017
.  // Make state version of unemployment rate  
.   use `scratch'/unrate_state

.   isid fipsst yearmo

.   sort fipsst yearmo

.   gen year=yofd(dofm(yearmo))

.   bys fipsst year (yearmo): keep if _n==_N
(24,735 observations deleted)

.   keep fipsst year ur_st_annual ur_st_3yr_avg

.   label var ur_st_annual "Unemployment rate (state)"

.   tempfile stateur

.   save `stateur'
file /tmp/St2868349.000007 saved as .dta format

.   merge 1:1 fipsst year using `scratch'/statepop, nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                           408
        from master                       102  
        from using                        306  

    Matched                             2,193  
    -----------------------------------------

.   sort fipsst year

.   bysort fipsst: replace state_name = state_name[1] if missing(state_name)
(102 real changes made)

.  merge m:1 year using `popur_n', keepusing(year ur_nat_annual ur_nat_3yr_avg) assert(2 3) keep(3) nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                             2,601  
    -----------------------------------------

. 
.   tempfile popur_s

.   save `popur_s'
file /tmp/St2868349.000008 saved as .dta format

.   
.  
.  //unzip extractcps and findpartners;
.   !zcat `prepdata'/extractcps.dta.gz > `prepdata'/extractcps.dta 


.   use `prepdata'/extractcps.dta

.   !rm `prepdata'/extractcps.dta


. 
. *** COHORT: ****
. gen birthcohort=(year-age)

. 
. rename wgt_composite bigcpswgt

. rename stfips fipsst

. 
. ************************************************
. *********** 1: SAMPLE RESTRICTIONS *************
. ************************************************
. keep if age>=22 & age<=40
(33,871,674 observations deleted)

. keep if birthcohort>=1948
(1,108,158 observations deleted)

.   // Drop last year data, which is not yet complete
.   drop if year>2019
(0 observations deleted)

. 
. ************************************************
. *********** 2: MAKE SOME VARIABLES *************
. ************************************************
. *  Weekly hours, w/ zeros
. replace hourslw=0 if hourslw==. & pemlr~=-1
(4,433,525 real changes made)

. *  Weekly hours, w/o zeros 
. gen hourslw_pos=hourslw if hourslw>0 & hourslw<.
(4,552,034 missing values generated)

. gen byte ed_hs=(educ5==2) if educ5<.

. gen byte ed_scol=(educ5==3) if educ5<.

. gen byte ed_ba=(educ5==4) if educ5<.

. gen byte ed_grad=(educ5==5) if educ5<.

. 
. gen byte educ2=(inlist(educ5, 4, 5)) if educ5<.

. keep if educ2==1
(12,626,877 observations deleted)

. 
.   keep if bigcpswgt<.
(5,796 observations deleted)

.   gen entrycohort=birthcohort+22 if educ2==1

.   replace entrycohort=birthcohort+18 if educ2==0
(0 real changes made)

.   label var birthcohort "Year of Birth"

.   label var entrycohort "Year of entry on the labor market, depending on level of education"

.       merge m:1 fipsst year using `popur_s', assert(2 3) keep(3) nogen
(variable fipsst was byte, now double to accommodate using data's values)
(variable year was int, now double to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                         4,820,494  
    -----------------------------------------

.       rename ur_nat_annual ur_nat

.       rename ur_st_annual ur_st

.      // And merge on age-22 UR
.       rename year origyr

.       gen year=entrycohort

.       merge m:1 fipsst year using `popur_s', keepusing(year ur_nat_annual ur_st_annual) keep(1 3) nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                         4,820,494  
    -----------------------------------------

.       rename ur_nat_annual ur0_nat_22  

.       label var ur0_nat_22 "UR (natl) at age 22"

.       rename ur_st_annual ur0_st_22

.       label var ur0_st_22 "UR (state) at age 22"  

.       drop year

.       rename origyr year

.      // Make a consolidated age-22 UR that uses national rate before 1976 and state rate before
.      // This makes it possible to include earlier cohorts in the analyses that control for the age-22 UR.
.       gen ur0_22=ur0_st_22
(585,804 missing values generated)

.       assert ur0_22==. if birthcohort<1954

.       replace ur0_22=ur0_nat_22 if birthcohort<1954
(585,804 real changes made)

.       label var ur0_22 "Age-22 UR (nat pre-1976, state 1976-)"

. 
. 
.   gen ur0=ur0_22 if educ2==1

.   gen ur0_nat=ur0_nat_22 if educ2==1

.     
.   // Subsample that is age 30+ when GR hit for educated, 26 otherwise 
.   gen estsamp=(birthcohort<=1978 & ur0<.) if educ2==1

.   replace estsamp=(birthcohort<=1982 & ur0<.) if educ2==0
(0 real changes made)

.   assert estsamp==(birthcohort>=1948 & birthcohort<=1978) if educ2==1 

.   assert estsamp==0 if educ2==1 & (birthcohort<1948 | birthcohort>1978)

.   assert estsamp==(birthcohort>=1952 & birthcohort<=1982) if educ2==0
(null assertion)

.   assert estsamp==0 if educ2==0 & (birthcohort<1952 | birthcohort>1982)
(null assertion)

.   gen estsampb=(ur0<.)

.   assert estsampb==(birthcohort>=1948 & birthcohort<=1997) if educ2==1 

.   assert estsampb==(birthcohort>=1952 & birthcohort<=2001) if educ2==0 
(null assertion)

.   // create inverse mills ratios
.   // Need to merge on education fraction
.   merge m:1 year birthcohort educ2 fipsst using `prepdata'/combinecollapse_yca2s, keepusing(edfr_yc edfr_ycs)
(label stfips_label already defined)

    Result                      Number of obs
    -----------------------------------------
    Not matched                       235,961
        from master                         0  (_merge==1)
        from using                    235,961  (_merge==2)

    Matched                         4,820,494  (_merge==3)
    -----------------------------------------

.   assert _merge==3 if educ2==1 & birthcohort>=1948 & age>=22 & age<=40 & year>=1979

.   drop if _merge==2
(235,961 observations deleted)

.   gen imr_yc=normalden(invnormal(edfr_yc))/edfr_yc if educ2==1

.   replace imr_yc=normalden(invnormal(1-edfr_yc))/(1-edfr_yc) if educ2==0
(0 real changes made)

.   
.   gen imr_ycs=normalden(invnormal(edfr_ycs))/edfr_ycs if educ2==1

.   replace imr_ycs=normalden(invnormal(1-edfr_ycs))/(1-edfr_ycs) if educ2==0
(0 real changes made)

. 
. 
. 
. // Summary Statistics Table 
. local bigcpsvars "ed_grad empl birthcohort age entrycohort uhours ur_st ur0 edfr_yc imr_yc edfr_ycs imr_ycs"

. estpost tabstat `bigcpsvars' [aw=bigcpswgt], stat(mean sd min p10 p50 p90 max) col(stat)

Summary statistics: mean sd min p10 p50 p90 max
     for variables: ed_grad empl birthcohort age entrycohort uhours ur_st ur0 edfr_yc imr_yc edfr_ycs imr_ycs

             |   e(mean)      e(sd)     e(min)     e(p10)     e(p50)     e(p90)     e(max) 
-------------+-----------------------------------------------------------------------------
     ed_grad |  .2869407   .4523337          0          0          0          1          1 
        empl |  .8539181   .3531883          0          0          1          1          1 
 birthcohort |   1970.82   12.04555       1948       1954       1971       1987       1997 
         age |  31.33752   5.155044         22         24         31         39         40 
 entrycohort |   1992.82   12.04555       1970       1976       1993       2009       2019 
      uhours |  41.18804   11.19789         -9         30         40         54         99 
       ur_st |  5.997149   2.009884        2.3        3.8        5.5        8.8       17.8 
         ur0 |  6.233142   1.900509        2.3        4.2        5.8        8.7       17.8 
     edfr_yc |  .2960512   .0622594   .1025652   .2244388   .2925417   .3802295   .4279896 
      imr_yc |  1.177451   .1412042   .9169041   1.001568   1.174879   1.334386   1.742961 
    edfr_ycs |  .3104538   .0918822   .0032405   .2004951   .3025778    .430333   .8147043 
     imr_ycs |  1.155887   .1974169   .3279648   .9128832   1.153538   1.398429   3.026369 

. eststo bigcps

. 
. 
. // Now prepare ORG file
. !zcat `prepdata'/extractorg_morg.dta.gz > `prepdata'/extractorg_morg.dta


. use `prepdata'/extractorg_morg.dta, clear

. !rm `prepdata'/extractorg_morg.dta


. *** COHORT: ****
. gen birthcohort=(year-age)

. ************************************************
. *********** 1: SAMPLE RESTRICTIONS *************
. ************************************************
. keep if age>=22 & age<=40
(8,516,295 observations deleted)

. keep if age>=22 & age<=40
(0 observations deleted)

. keep if birthcohort>=1948
(276,773 observations deleted)

.   // Drop last year data, which is not yet complete
.   drop if year>2019
(0 observations deleted)

. 
. rename gestfips fipsst

. 
. ************************************************
. *********** 2: MAKE SOME VARIABLES *************
. ************************************************
. * educ5
. gen educ4=.
(4,376,810 missing values generated)

. replace educ4=1 if inlist(educ92,0,1,2,3,4,5,6,7,8)==1
(491,629 real changes made)

. replace educ4=2 if inlist(educ92,9)==1
(1,472,787 real changes made)

. replace educ4=3 if inlist(educ92,10,11,12)==1
(1,200,418 real changes made)

. replace educ4=4 if inlist(educ92,13,14,15,16)==1
(1,211,976 real changes made)

. gen educ5=educ4

. replace educ5=5 if inlist(educ92,14,15,16)
(354,493 real changes made)

. drop if educ5==.
(0 observations deleted)

. label define attain_l 1 "LTHS" 2 "HS" 3 "Some col." 4 "BA" 5 "MA+"

. label values educ5 attain_l

. gen byte educ2=(inlist(educ5, 4, 5)) if educ5<.

. keep if educ2==1
(3,164,834 observations deleted)

. 
. gen orgwgt_rw_l=orgwgt if rw_l<.
(267,228 missing values generated)

. gen earnwt_rw_l=earnwt if rw_l<.
(267,228 missing values generated)

. 
. 
. ******** Sex ******
. replace sex = 0 if sex == 2
(640,936 real changes made)

. tab sex

        Sex |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |    640,936       52.88       52.88
       Male |    571,040       47.12      100.00
------------+-----------------------------------
      Total |  1,211,976      100.00

. 
. local orgvars "rw_l"

. estpost tabstat `orgvars' [aw=orgwgt], stat(mean sd min p10 p50 p90 max) col(stat)

Summary statistics: mean sd min p10 p50 p90 max
     for variables: rw_l

             |   e(mean)      e(sd)     e(min)     e(p10)     e(p50)     e(p90)     e(max) 
-------------+-----------------------------------------------------------------------------
        rw_l |  3.056772   .5251189   .1912557   2.382788   3.061717    3.73428   5.375453 

. eststo org

. 
. 
. esttab bigcps using `output'/summarystats_statdes.txt, title("Summary Statistics")   replace ///
> cells("mean(label(Mean)fmt(%9.3f)) sd(label(Std.Dev.)fmt(%9.3f)) min(label(Min)fmt(%9.2f)) max(label(Max)fmt(%9.2f)) p10(label(p10
> )fmt(%9.2f)) p50(label(p50)fmt(%9.2f)) p90(label(p90)fmt(%9.2f))")
(output written to /accounts/projects/jr_ra/GRscarring/erratum/results/summarystats_statdes.txt)

. 
. esttab org using `output'/summarystats_statdes.txt, title("Summary Statistics")   append ///
> cells("mean(label(Mean)fmt(%9.3f)) sd(label(Std.Dev.)fmt(%9.3f)) min(label(Min)fmt(%9.2f)) max(label(Max)fmt(%9.2f)) p10(label(p10
> )fmt(%9.2f)) p50(label(p50)fmt(%9.2f)) p90(label(p90)fmt(%9.2f))")
(output written to /accounts/projects/jr_ra/GRscarring/erratum/results/summarystats_statdes.txt)

. 
. 
. *create Table1b with full data
. *estpost tabstat $list2, stat(mean sd min max) col(stat) 
. *esttab . using `output'/summarystats_statdes.txt, title("Summary Statistics")   replace ///
> *cells("mean(label(Mean)fmt(%9.3f)) sd(label(Std.Dev.)fmt(%9.3f)) min(label(Min)fmt(%9.2f)) max(label(Max)fmt(%9.2f))")
. 
. save `prepdata'/summarystats_statdes.dta, replace
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/summarystats_statdes.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/summarystats_statdes.dta saved

.   
. if `doasproject'==1 {
.       project, creates(`output'/summarystats_statdes.txt)
project GRscar_erratum > do-file creates: "results/summarystats_statdes.txt" filesig(2862160549:3054)
.       project, creates(`prepdata'/summarystats_statdes.dta)
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/summarystats_statdes.dta" filesig(91
> 3000157:198805591)
. }

. 
end of do-file
      name:  plog_896
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/analysis/summarystats.log
  log type:  text
 closed on:  27 Nov 2024, 18:04:13
------------------------------------------------------------------------------------------------------------------------------------
